### Supplementary Data Z
### Statistical models used for DE analysis and clustering analysis.
### R code

## DESeq2 for iVEC DE analysis
# dfcounts is the count table, colData is the design table, it is simple two-condition pairwise comparisons.
library(DESeq2)
dds<-DESeqDataSetFromMatrix(countData=round(dfcounts,0),colData=colData,design=~condition)
invisible(dds<-DESeq(dds))
res<-results(dds)
output<-as.data.frame(apply(res,2,as.vector))
write.table(output,"DESeq2_results.tsv",sep="\t",col.names=NA,quote=F)

## edgeR for iSM DE analysis
# dfcounts is the count table, group is the design table, it is simple two-condition pairwise comparisons.
# low-expressed genes are filtered out prior to DE analysis.
library(edgeR)
y<-DGEList(counts=round(dfcounts,0),group=group)
CPM<-cpm(y)
nfactors=length(levels(group))
minfactors=tail(table(group),n=1)
total_sample<-length(group)
keep<-rowSums(CPM>1) >= minfactors # at least be expressed in one condition
# we need to keep the low-expressed genes in the final output and fill the missing values
yleft<-y[!keep, ,keep.lib.sizes=T]
y<-y[keep, ,keep.lib.sizes=F] 
y <- calcNormFactors(y,method="TMM")
design=model.matrix(~1+group)
y<-estimateGLMCommonDisp(y,design,verbose=F)
y<-estimateGLMTrendedDisp(y,design)
y<-estimateGLMTagwiseDisp(y,design)
fit<-glmFit(y,design)
lrt<-glmLRT(fit)
pvals<-lrt$table$PValue
FDR<-p.adjust(pvals, method="BH")
output<-cbind(lrt$table,FDR)
write.table(output,"edgeR_DEresults.tsv",sep="\t",col.names=NA,quote=F)

## clustering analysis
# for the clustering analysis, we first generate a heatmap accessing overall expression pattern
# dfcounts is the count table, markerGenes are the signatures genes identified from different cell types (included in Supplementary Dataset DE results excel spreadsheets)
library(gplots)
library(edgeR)
dfcpm<-cpm(as.matrix(dfcounts),log=F)
dflog<-log2(dfcpm+5)
dfnorm<-as.matrix(t(apply(dflog,1,scale))) # row-wise z-scaling
colnames(dfnorm)<-colnames(dflog)
rownames(dfnorm)<-rownames(dflog)
pdf("heatmap.pdf",width=10,height=20,compress=F)
heatOBJ<-heatmap.2(dfnorm[markerGenes,],Rowv=T,Colv = T ,dendrogram = "both",symm=F,scale="none",revC=T,
					distfun = function(x) as.dist(1-cor(t(x))), hclust=function(x) hclust(x,method="ward.D2"),
					key=T,keysize=0.8,key.title="",key.xlab="z-score", 
					density.info="none",trace="none",hline=NA,vline=NA,cexCol=1,
					lwid=c(0.5,4),lhei=c(0.2,4,1),lmat=rbind(c(0,3),c(2,1),c(0,4)),srtCol = 60,margins=c(15,20))
dev.off()
# depending on the clustering pattern, we group all markerGenes into k groups (k=3 for endo_vs_fibro iVEC and k=4 for fibro_vs_smoothCell iSM)
testTree<-cutree(as.hclust(heatOBJ$rowDendrogram),k=k)
testTree<-testTree[order(testTree,decreasing = T)]
dftree<-data.frame(ANNO=names(testTree),cluster=testTree)
write.table(dftree,"table_cutree_clustering_results.tsv",sep="\t",row.names=F,quote=F)

 

